{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Info\n",
    "\n",
    "14-cancer  gene expression data. 16,063 genes, 144 training samples,\n",
    "54 test samples. \n",
    "\n",
    "One gene per row, one sample per column\n",
    "\n",
    "Cancer classes are labelled as follows:\n",
    "\n",
    "1.  breast\n",
    "2.  prostate\n",
    "3.  lung\n",
    "4.  collerectal\n",
    "5.  lymphoma\n",
    "6.  bladder\n",
    "7.  melanoma\n",
    "8.  uterus\n",
    "9.  leukemia\n",
    "10. renal\n",
    "11. pancreas\n",
    "12. ovary\n",
    "13. meso\n",
    "14. cns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading all the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Import all the libraries\n",
    "import pandas as pd\n",
    "import io\n",
    "import requests\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Url for all the datasets \n",
    "xtrainUrl = 'http://statweb.stanford.edu/~tibs/ElemStatLearn/datasets/14cancer.xtrain'\n",
    "ytrainUrl = 'http://statweb.stanford.edu/~tibs/ElemStatLearn/datasets/14cancer.ytrain'\n",
    "xtestUrl = 'http://statweb.stanford.edu/~tibs/ElemStatLearn/datasets/14cancer.xtest'\n",
    "ytestUrl='http://statweb.stanford.edu/~tibs/ElemStatLearn/datasets/14cancer.ytest'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Load xtrain data \n",
    "s=requests.get(xtrainUrl).content\n",
    "xtrain=pd.read_csv(io.StringIO(s.decode('utf-8')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Load ytrain data \n",
    "s=requests.get(ytrainUrl).content\n",
    "ytrain=pd.read_csv(io.StringIO(s.decode('utf-8')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Load xtest data \n",
    "s=requests.get(xtestUrl).content\n",
    "xtest=pd.read_csv(io.StringIO(s.decode('utf-8')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Load xtest data \n",
    "s=requests.get(ytestUrl).content\n",
    "ytest=pd.read_csv(io.StringIO(s.decode('utf-8')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(16062, 1)\n",
      "(0, 1)\n",
      "(16062, 1)\n",
      "(0, 1)\n"
     ]
    }
   ],
   "source": [
    "print(xtrain.shape)\n",
    "print(ytrain.shape)\n",
    "print(xtest.shape)\n",
    "print(ytest.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Empty DataFrame\n",
      "Columns: [ 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 10 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14]\n",
      "Index: []\n",
      "Empty DataFrame\n",
      "Columns: [ 1 1 1 2 2 3 3 3 4 4 4 5 5 5 5 5 5 6 6 6 7 7 8 8 9 9 9 9 9 9 10 10 10 11 11 11 12 12 12 13 13 13 14 14 14 14 12 4 3 2 2 2 2 1]\n",
      "Index: []\n"
     ]
    }
   ],
   "source": [
    "print(ytrain)\n",
    "print(ytest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.feature_selection import RFECV\n",
    "from sklearn.svm import SVR \n",
    "\n",
    "estimator = SVR(kernel=\"linear\")\n"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
